PyPI - ragaai-catalyst - Versions diffs - 2.1.7.5b5__py3-none-any.whl → 2.2__py3-none-any.whl - Mend

ragaai-catalyst 2.1.7.5b5py3-none-any.whl → 2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

ragaai_catalyst/tracers/tracer.py CHANGED Viewed

@@ -142,6 +142,7 @@ class Tracer(AgenticTracing):
         self.start_time = datetime.datetime.now().astimezone().isoformat()
         self.model_cost_dict = model_cost
         self.user_context = ""  # Initialize user_context to store context from add_context
+        self.user_gt = ""  # Initialize user_gt to store gt from add_gt
         self.file_tracker = TrackName()
         self.post_processor = None
         self.max_upload_workers = max_upload_workers
@@ -178,22 +179,21 @@ class Tracer(AgenticTracing):
             logger.error(f"Failed to retrieve projects list: {e}")
             raise
-        if tracer_type == "langchain":
-            instrumentors = []
-            from openinference.instrumentation.langchain import LangChainInstrumentor
-            instrumentors += [(LangChainInstrumentor, [])]
-            self._setup_agentic_tracer(instrumentors)
-        elif tracer_type == "llamaindex":
-            self._upload_task = None
-            self.llamaindex_tracer = None
-        elif tracer_type == "rag/langchain":
-            instrumentors = []
-            from openinference.instrumentation.langchain import LangChainInstrumentor
-            instrumentors += [(LangChainInstrumentor, [])]
-            self._setup_agentic_tracer(instrumentors)
+        # if tracer_type == "langchain":
+        #     instrumentors = []
+        #     from openinference.instrumentation.langchain import LangChainInstrumentor
+        #     instrumentors += [(LangChainInstrumentor, [])]
+        #     self._setup_agentic_tracer(instrumentors)
+        # elif tracer_type == "llamaindex":
+        #     self._upload_task = None
+        #     self.llamaindex_tracer = None
+        # elif tracer_type == "rag/langchain":
+        #     instrumentors = []
+        #     from openinference.instrumentation.langchain import LangChainInstrumentor
+        #     instrumentors += [(LangChainInstrumentor, [])]
+        #     self._setup_agentic_tracer(instrumentors)
         # Handle agentic tracers
-        elif tracer_type == "agentic" or tracer_type.startswith("agentic/"):
+        if tracer_type == "agentic" or tracer_type.startswith("agentic/") or tracer_type == "langchain":
             # Setup instrumentors based on tracer type
             instrumentors = []
@@ -308,11 +308,11 @@ class Tracer(AgenticTracing):
                     return
             # Handle specific framework instrumentation
-            elif tracer_type == "agentic/llamaindex":
+            elif tracer_type == "agentic/llamaindex" or tracer_type == "llamaindex":
                 from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
                 instrumentors += [(LlamaIndexInstrumentor, [])]
-            elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph":
+            elif tracer_type == "agentic/langchain" or tracer_type == "agentic/langgraph" or tracer_type == "langchain":
                 from openinference.instrumentation.langchain import LangChainInstrumentor
                 instrumentors += [(LangChainInstrumentor, [])]
@@ -378,6 +378,9 @@ class Tracer(AgenticTracing):
             "input_cost_per_token": float(cost_config["input_cost_per_million_token"])/ 1000000,
             "output_cost_per_token": float(cost_config["output_cost_per_million_token"]) /1000000
         }
+        self.dynamic_exporter.custom_model_cost = self.model_custom_cost
+        logger.info(f"Updated custom model cost for {model_name}: {self.model_custom_cost[model_name]}")
     def register_masking_function(self, masking_func):
         """
@@ -401,66 +404,27 @@ class Tracer(AgenticTracing):
         def recursive_mask_values(obj, parent_key=None):
             """Apply masking to all values in nested structure."""
-            if isinstance(obj, dict):
-                if self.tracer_type == "langchain":
-                    # Special handling for LangChain data
-                    if isinstance(obj, dict):
-                        if obj.get("name", "") == "retrieve_documents.langchain.workflow":
-                            prompt_structured_data = {
-                                "traceloop.entity.input": json.dumps({
-                                    "kwargs": {
-                                        "input": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.input", "")).get("kwargs", {}).get("input", "")),
-                                    }
-                                })
-                            }
-                            prompt_data = {
-                                "name": "retrieve_documents.langchain.workflow",
-                                "attributes": prompt_structured_data,
-                            }
-                            return prompt_data
-                        elif obj.get("name", "") == "PromptTemplate.langchain.task":
-                            context_structured_data = {
-                                "traceloop.entity.input": json.dumps({
-                                    "kwargs": {
-                                        "context": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.input", "")).get("kwargs", {}).get("context", "")),
-                                    }
-                                }),
-                                "traceloop.entity.output": json.dumps({
-                                    "kwargs": {
-                                        "text": masking_func(json.loads(obj.get("attributes", {}).get("traceloop.entity.output", "")).get("kwargs", {}).get("text", "")),
-                                    }
-                                })
-                            }
-                            context_data = {
-                                "name": "PromptTemplate.langchain.task",
-                                "attributes": context_structured_data,
-                            }
-                            return context_data
-                        elif obj.get("name", "") == "ChatOpenAI.langchain.task":
-                            response_structured_data = {"gen_ai.completion.0.content": masking_func(obj.get("attributes", {}).get("gen_ai.completion.0.content", "")),
-                                                        "gen_ai.prompt.0.content": masking_func(obj.get("attributes", {}).get("gen_ai.prompt.0.content", ""))}
-                            response_data = {
-                                "name": "ChatOpenAI.langchain.task",
-                                "attributes" : response_structured_data
-                            }
-                            return response_data
-                else:
+            try:
+                if isinstance(obj, dict):
                     return {k: recursive_mask_values(v, k) for k, v in obj.items()}
-            elif isinstance(obj, list):
-                return [recursive_mask_values(item, parent_key) for item in obj]
-            elif isinstance(obj, str):
-                # List of keys that should NOT be masked
-                excluded_keys = {
-                    'start_time', 'end_time', 'name', 'id',
-                    'hash_id', 'parent_id', 'source_hash_id',
-                    'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
-                    'telemetry.sdk.language','service.name'
-                }
-                # Apply masking only if the key is NOT in the excluded list
-                if parent_key and parent_key.lower() not in excluded_keys:
-                    return masking_func(obj)
-                return obj
-            else:
+                elif isinstance(obj, list):
+                    return [recursive_mask_values(item, parent_key) for item in obj]
+                elif isinstance(obj, str):
+                    # List of keys that should NOT be masked
+                    excluded_keys = {
+                        'start_time', 'end_time', 'name', 'id',
+                        'hash_id', 'parent_id', 'source_hash_id',
+                        'cost', 'type', 'feedback', 'error', 'ctx','telemetry.sdk.version',
+                        'telemetry.sdk.language','service.name'
+                    }
+                    # Apply masking only if the key is NOT in the excluded list
+                    if parent_key and parent_key.lower() not in excluded_keys:
+                        return masking_func(obj)
+                    return obj
+                else:
+                    return obj
+            except Exception as e:
+                logger.error(f"Error masking value: {e}")
                 return obj
         def file_post_processor(original_trace_json_path: os.PathLike) -> os.PathLike:
@@ -535,20 +499,8 @@ class Tracer(AgenticTracing):
             'max_upload_workers': self.max_upload_workers
         }
-        # Save the model_custom_cost before reinitialization
-        saved_model_custom_cost = self.model_custom_cost.copy()
-        # Reinitialize self with new external_id and stored parameters
-        self.__init__(
-            external_id=external_id,
-            **current_params
-        )
-        # Restore the model_custom_cost after reinitialization
-        self.model_custom_cost = saved_model_custom_cost
-        self.dynamic_exporter.custom_model_cost = self.model_custom_cost
+        self.dynamic_exporter.external_id = external_id
+        logger.debug(f"Updated external_id to {external_id}")
     def set_dataset_name(self, dataset_name):
         """
@@ -646,8 +598,11 @@ class Tracer(AgenticTracing):
             super().start()
             return self
         elif self.tracer_type == "llamaindex":
-            self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
-            return self.llamaindex_tracer.start()
+            super().start()
+            return self
+            # self.llamaindex_tracer = LlamaIndexInstrumentationTracer(self._pass_user_data())
+            # return self.llamaindex_tracer.start()
         elif self.tracer_type == "rag/langchain":
             super().start()
             return self
@@ -661,35 +616,39 @@ class Tracer(AgenticTracing):
             super().stop()
             return self
         elif self.tracer_type == "llamaindex":
-            if self.llamaindex_tracer is None:
-                raise ValueError("LlamaIndex tracer was not started")
-            user_detail = self._pass_user_data()
-            converted_back_to_callback = self.llamaindex_tracer.stop()
+            super().stop()
+            return self
-            filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
-            with open(filepath_3, 'w') as f:
-                json.dump(converted_back_to_callback, f, default=str, indent=2)
-            # Apply post-processor if registered
-            if self.post_processor is not None:
-                try:
-                    final_trace_filepath = self.post_processor(filepath_3)
-                    logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
-                except Exception as e:
-                    logger.error(f"Error in post-processing: {e}")
-            else:
-                final_trace_filepath = filepath_3
-            if converted_back_to_callback:
-                UploadTraces(json_file_path=final_trace_filepath,
-                             project_name=self.project_name,
-                             project_id=self.project_id,
-                             dataset_name=self.dataset_name,
-                             user_detail=user_detail,
-                             base_url=self.base_url
-                             ).upload_traces()
-            return
+            # if self.llamaindex_tracer is None:
+            #     raise ValueError("LlamaIndex tracer was not started")
+            # user_detail = self._pass_user_data()
+            # converted_back_to_callback = self.llamaindex_tracer.stop()
+            # filepath_3 = os.path.join(os.getcwd(), "llama_final_result.json")
+            # with open(filepath_3, 'w') as f:
+            #     json.dump(converted_back_to_callback, f, default=str, indent=2)
+            # # Apply post-processor if registered
+            # if self.post_processor is not None:
+            #     try:
+            #         final_trace_filepath = self.post_processor(filepath_3)
+            #         logger.debug(f"Post-processor applied successfully, new path: {filepath_3}")
+            #     except Exception as e:
+            #         logger.error(f"Error in post-processing: {e}")
+            # else:
+            #     final_trace_filepath = filepath_3
+            # if converted_back_to_callback:
+            #     UploadTraces(json_file_path=final_trace_filepath,
+            #                  project_name=self.project_name,
+            #                  project_id=self.project_id,
+            #                  dataset_name=self.dataset_name,
+            #                  user_detail=user_detail,
+            #                  base_url=self.base_url
+            #                  ).upload_traces()
+            # return
         elif self.tracer_type == "rag/langchain":
             super().stop()
         else:
@@ -697,7 +656,7 @@ class Tracer(AgenticTracing):
     def get_upload_status(self):
         """Check the status of the trace upload."""
-        if self.tracer_type == "langchain":
+        if self.tracer_type == "langchain" or self.tracer_type == "llamaindex":
             if self._upload_task is None:
                 return "No upload task in progress."
             if self._upload_task.done():
@@ -861,6 +820,7 @@ class Tracer(AgenticTracing):
             post_processor= self.post_processor,
             max_upload_workers = self.max_upload_workers,
             user_context = self.user_context,
+            user_gt = self.user_gt,
             external_id=self.external_id
         )
@@ -904,33 +864,44 @@ class Tracer(AgenticTracing):
         Args:
             context: Additional context information to be added to the trace. Can be a string.
-        Raises:
-            ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
         """
         if self.tracer_type not in ["langchain", "llamaindex"]:
-            raise ValueError("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
+            logger.warning("add_context is only supported for 'langchain' and 'llamaindex' tracer types")
+            return
         # Convert string context to string if needed
         if isinstance(context, str):
             self.dynamic_exporter.user_context = context
             self.user_context = context
         else:
-            raise TypeError("context must be a string")
+            logger.warning("context must be a string")
-    def add_metadata(self, metadata):
+    def add_gt(self, gt):
         """
-        Add metadata information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
+        Add gt information to the trace. This method is only supported for 'langchain' and 'llamaindex' tracer types.
         Args:
-            metadata: Additional metadata information to be added to the trace. Can be a dictionary.
-        Raises:
-            ValueError: If tracer_type is not 'langchain' or 'llamaindex'.
+            gt: gt information to be added to the trace. Can be a string.
         """
         if self.tracer_type not in ["langchain", "llamaindex"]:
-            raise ValueError("add_metadata is only supported for 'langchain' and 'llamaindex' tracer types")
+            logger.warning("add_gt is only supported for 'langchain' and 'llamaindex' tracer types")
+            return
+        # Convert string gt to string if needed
+        if isinstance(gt, str):
+            self.dynamic_exporter.user_gt = gt
+            self.user_gt = gt
+        else:
+            logger.warning("gt must be a string")
+    def add_metadata(self, metadata):
+        """
+        Add metadata information to the trace. If metadata is a dictionary, it will be merged with existing metadata.
+        Non-dictionary metadata or keys not present in the existing metadata will be logged as warnings.
+        Args:
+            metadata: Additional metadata information to be added to the trace. Should be a dictionary.
+        """
         # Convert string metadata to string if needed
         user_details = self.user_details
         user_metadata = user_details["trace_user_detail"]["metadata"]
@@ -939,8 +910,8 @@ class Tracer(AgenticTracing):
                 if key in user_metadata:
                     user_metadata[key] = value
                 else:
-                    raise ValueError(f"Key '{key}' not found in metadata")
+                    logger.warning(f"Key '{key}' not found in metadata")
             self.dynamic_exporter.user_details = user_details
             self.metadata = user_metadata
         else:
-            raise TypeError("metadata must be a dictionary")
+            logger.warning("metadata must be a dictionary")

ragaai_catalyst/tracers/utils/rag_extraction_logic_final.py ADDED Viewed

@@ -0,0 +1,205 @@
+import logging
+logger = logging.getLogger(__name__)
+import json
+def rag_trace_json_converter(input_trace):
+    tracer_type = input_trace.get("tracer_type")
+    input_trace = input_trace.get("data", [])[0].get("spans", [])
+    def get_prompt(input_trace):
+        try:
+            if tracer_type == "langchain":
+                for span in input_trace:
+                    try:
+                        attributes = span.get("attributes", {})
+                        if attributes:
+                            for key, value in attributes.items():
+                                try:
+                                    if key.startswith("llm.input_messages.") and key.endswith(".message.role") and value == "user":
+                                        message_num = key.split(".")[2]
+                                        content_key = f"llm.input_messages.{message_num}.message.content"
+                                        if content_key in attributes:
+                                            return attributes.get(content_key)
+                                except Exception as e:
+                                    logger.warning(f"Error processing attribute key-value pair: {str(e)}")
+                                    continue
+                            for key, value in attributes.items():
+                                try:
+                                    if key.startswith("llm.prompts") and isinstance(value, list):
+                                        human_message = None
+                                        for message in value:
+                                            if isinstance(message, str):
+                                                human_index = message.find("Human:")
+                                                if human_index != -1:
+                                                    human_message = message[human_index:].replace("Human:", "")
+                                                    break
+                                        return human_message if human_message else value
+                                except Exception as e:
+                                    logger.warning(f"Error processing attribute key-value pair for prompt: {str(e)}")
+                                    continue
+                    except Exception as e:
+                        logger.warning(f"Error processing span for prompt extraction: {str(e)}")
+                        continue
+                for span in input_trace:
+                    try:
+                        if span["name"] == "LLMChain":
+                            try:
+                                input_value = span["attributes"].get("input.value", "{}")
+                                return json.loads(input_value).get("question", "")
+                            except json.JSONDecodeError:
+                                logger.warning(f"Invalid JSON in LLMChain input.value: {input_value}")
+                                continue
+                        elif span["name"] == "RetrievalQA":
+                            return span["attributes"].get("input.value", "")
+                        elif span["name"] == "VectorStoreRetriever":
+                            return span["attributes"].get("input.value", "")
+                    except Exception as e:
+                        logger.warning(f"Error processing span for fallback prompt extraction: {str(e)}")
+                        continue
+                logger.warning("No user message found in any span")
+                logger.warning("Returning empty string for prompt.")
+                return ""
+            elif tracer_type == "llamaindex":
+                for span in input_trace:
+                    if span["name"] == "BaseQueryEngine.query":
+                        return span["attributes"]["input.value"]
+                    elif "query_bundle" in span["attributes"].get("input.value", ""):
+                        try:
+                            query_data = json.loads(span["attributes"]["input.value"])
+                            if "query_bundle" in query_data:
+                                return query_data["query_bundle"]["query_str"]
+                        except json.JSONDecodeError:
+                            logger.error("Failed to parse query_bundle JSON")
+            logger.error("Prompt not found in the trace")
+            return None
+        except Exception as e:
+            logger.error(f"Error while extracting prompt from trace: {str(e)}")
+            return None
+    def get_response(input_trace):
+        try:
+            if tracer_type == "langchain":
+                for span in input_trace:
+                    try:
+                        attributes = span.get("attributes", {})
+                        if attributes:
+                            for key, value in attributes.items():
+                                try:
+                                    if key.startswith("llm.output_messages.") and key.endswith(".message.content"):
+                                        return value
+                                except Exception as e:
+                                    logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
+                                    continue
+                            for key, value in attributes.items():
+                                try:
+                                    if key.startswith("output.value"):
+                                        try:
+                                            output_json = json.loads(value)
+                                            if "generations" in output_json and isinstance(output_json.get("generations"), list) and len(output_json.get("generations")) > 0:
+                                                if isinstance(output_json.get("generations")[0], list) and len(output_json.get("generations")[0]) > 0:
+                                                    first_generation = output_json.get("generations")[0][0]
+                                                    if "text" in first_generation:
+                                                        return first_generation["text"]
+                                        except json.JSONDecodeError:
+                                            logger.warning(f"Invalid JSON in output.value: {value}")
+                                            continue
+                                except Exception as e:
+                                    logger.warning(f"Error processing attribute key-value pair for response: {str(e)}")
+                                    continue
+                    except Exception as e:
+                        logger.warning(f"Error processing span for response extraction: {str(e)}")
+                        continue
+                for span in input_trace:
+                    try:
+                        if span["name"] == "LLMChain":
+                            try:
+                                output_value = span["attributes"].get("output.value", "")
+                                if output_value:
+                                    return json.loads(output_value)
+                                return ""
+                            except json.JSONDecodeError:
+                                logger.warning(f"Invalid JSON in LLMChain output.value: {output_value}")
+                                continue
+                        elif span["name"] == "RetrievalQA":
+                            return span["attributes"].get("output.value", "")
+                        elif span["name"] == "VectorStoreRetriever":
+                            return span["attributes"].get("output.value", "")
+                    except Exception as e:
+                        logger.warning(f"Error processing span for fallback response extraction: {str(e)}")
+                        continue
+                logger.warning("No response found in any span")
+                return ""
+            elif tracer_type == "llamaindex":
+                for span in input_trace:
+                    if span["name"] == "BaseQueryEngine.query":
+                        return span["attributes"]["output.value"]
+            logger.error("Response not found in the trace")
+            return None
+        except Exception as e:
+            logger.error(f"Error while extracting response from trace: {str(e)}")
+            return None
+    def get_context(input_trace):
+        try:
+            if tracer_type == "langchain":
+                for span in input_trace:
+                    try:
+                        if span["name"] == "CustomContextSpan":
+                            return span["attributes"].get("input.value", "")
+                        elif span["name"] == "VectorStoreRetriever":
+                            return span["attributes"].get("retrieval.documents.1.document.content", "")
+                    except Exception as e:
+                        logger.warning(f"Error processing span for context extraction: {str(e)}")
+                        continue
+            elif tracer_type == "llamaindex":
+                for span in input_trace:
+                    try:
+                        if span["name"] == "CustomContextSpan":
+                            return span["attributes"].get("input.value", "")
+                        elif span["name"] == "BaseRetriever.retrieve":
+                            return span["attributes"]["retrieval.documents.1.document.content"]
+                    except Exception as e:
+                        logger.warning(f"Error processing span for context extraction: {str(e)}")
+                        continue
+            logger.warning("Context not found in the trace")
+            return ""
+        except Exception as e:
+            logger.error(f"Error while extracting context from trace: {str(e)}")
+            return ""
+    def get_gt(input_trace):
+        try:
+            if tracer_type == "langchain":
+                for span in input_trace:
+                    try:
+                        if span["name"] == "CustomGroundTruthSpan":
+                            return span["attributes"].get("input.value", "")
+                    except Exception as e:
+                        logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
+                        continue
+            elif tracer_type == "llamaindex":
+                for span in input_trace:
+                    try:
+                        if span["name"] == "CustomGroundTruthSpan":
+                            return span["attributes"].get("input.value", "")
+                    except Exception as e:
+                        logger.warning(f"Error processing span for ground truth extraction: {str(e)}")
+                        continue
+            logger.warning("Ground truth not found in the trace")
+            return ""
+        except Exception as e:
+            logger.error(f"Error while extracting ground truth from trace: {str(e)}")
+            return ""
+    prompt = get_prompt(input_trace)
+    response = get_response(input_trace)
+    context = get_context(input_trace)
+    gt = get_gt(input_trace)
+    return prompt, response, context, gt

ragaai-catalyst 2.1.7.5b5__py3-none-any.whl → 2.2__py3-none-any.whl

ragaai-catalyst 2.1.7.5b5py3-none-any.whl → 2.2py3-none-any.whl